Analysis date: 2020-01-10
library(plyr)
library(gtools)
library(openxlsx)
library(pheatmap)
library(reshape2)
library(progress)
library(Matrix)
library(Hmisc)
library(lemon)
library(ggpubr)
library(effsize)
library(ggbeeswarm)
library(ggfortify)
library(ggpmisc)
library(ggrepel)
library(readxl)
library(DESeq2)
library(TOSTER)
library(tidyverse)
library(vsn)
library(fdrtool)
library(limma)
library(apeglm)
library(IHW)
library(Rtsne)
library(biomartr)
library(biomaRt)
library(MultiAssayExperiment)
library(PMA)
library(gplots)
library(RColorBrewer)
library(grid)
library(ConsensusClusterPlus)
library(survival)
library(survminer)
library(cowplot)
mae_path <- "/Volumes/sd17b003/Sophie/Analysis/Screen_analysis/Robjects"
mae_path_Eva <- "/Volumes/sd17b003/Sophie/Eva/Master_thesis/data"
consensus_path <- "/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics"
#load(file.path(mae_path, "multiomics_MAE_full.RData"))
load(file.path(mae_path_Eva, "multiomics_MAE.RData"))
source("/Volumes/sd17b003/Sophie/Analysis/Screen_analysis/Figure_layouts.R")
prot_few_nas <- multiomics_MAE[["proteomics"]] %>% is.na() %>% rowSums()
prot_few_nas <- prot_few_nas[ prot_few_nas == 0 ] %>% names()
Var50 <- assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE) %>% sort(decreasing = TRUE) %>% .[50]
Var500 <- assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE) %>% sort(decreasing = TRUE) %>% .[500]
Var1000 <- assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE) %>% sort(decreasing = TRUE) %>% .[1000]
#ensembl=useMart("ensembl")
#ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)
#genemap <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "start_position", "end_position" , "chromosome_name", "description"),
# filters = "hgnc_symbol", values = (prot_few_nas %>% unique), mart = ensembl)
#genemap <- genemap %>% as_tibble() %>% mutate(mean_position=(start_position + end_position)/2)
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/R_objects/ensembl_proteins_location.RData")
metadata(multiomics_MAE)$gene_symbol_mapping <- left_join(metadata(multiomics_MAE)$gene_symbol_mapping, genemap)
proteomics_tbl_meta_biomart_chrab <- wideFormat(multiomics_MAE[,,"chrom_abber"]) %>% as_tibble()
#proteomics_tbl_meta_biomart_chrab[, -1] <- proteomics_tbl_meta_biomart_chrab[, -1] > 20
proteomics_tbl_meta_biomart_chrab <- mutate_if(proteomics_tbl_meta_biomart_chrab, is.numeric, as.logical)
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ all(table(cl)>2) } )
] )
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ length(table(cl))>1 } )
] )
proteomics_tbl_meta_biomart_SNP <- wideFormat(multiomics_MAE[,,"SNPs"]) %>% as_tibble()
#proteomics_tbl_meta_biomart_SNP[, -1] <- (proteomics_tbl_meta_biomart_SNP[, -1] == 1)
proteomics_tbl_meta_biomart_SNP <- mutate_if(proteomics_tbl_meta_biomart_SNP, is.numeric, as.logical)
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ all(table(cl)>2) } )
] )
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ length(table(cl))>1 } )
] )
proteomics_tbl_meta_biomart_health <- wideFormat(multiomics_MAE[,,"health_record_bin"]) %>% as_tibble() %>% dplyr::select(primary, health_record_bin_IGHV_mutated, health_record_bin_elderly_at_diagnosis, health_record_bin_treated)
proteomics_tbl_meta_biomart_health <- mutate_if(proteomics_tbl_meta_biomart_health, is.numeric, as.logical)
proteomics_tbl_meta_biomart <- left_join(
left_join((longFormat(multiomics_MAE[,,"proteomics"], colDataCols = c("trisomy12", "IGHV") ) %>% as_tibble() %>%
mutate(IGHV= if_else(IGHV %in% c("M", "U"), IGHV, "NA") ) ),
metadata(multiomics_MAE)$gene_symbol_mapping[c(2,3,5:7)],
by=c("rowname"="hgnc_symbol")),
proteomics_tbl_meta_biomart_chrab, by=c("primary"))
proteomics_tbl_meta_biomart <- left_join( left_join(proteomics_tbl_meta_biomart,
proteomics_tbl_meta_biomart_SNP, by=c("primary")),
proteomics_tbl_meta_biomart_health, by=c("primary"))
drug_and_proteomics_prot <- longFormat(multiomics_MAE[,,c("proteomics")]) %>% as_tibble()
drug_and_proteomics_drug <- longFormat(multiomics_MAE[,,c("drug_resp_mono")]) %>% as_tibble()
drug_and_proteomics_drug <- drug_and_proteomics_drug %>% separate(rowname, into = c("Drug", "conc"), sep = "_") %>%
group_by(assay, primary, rowname=Drug, colname ) %>%
dplyr::summarise(value=mean(value, na.rm=TRUE)) %>% ungroup()
drug_and_proteomics <- bind_rows(drug_and_proteomics_prot, drug_and_proteomics_drug)
pats_drug_and_prot <- drug_and_proteomics %>% group_by(primary) %>% dplyr::summarise(nassay=length(unique(assay))) %>% dplyr::filter(nassay==2) %>% .$primary
drug_and_proteomics <- drug_and_proteomics %>% dplyr::filter(primary %in% pats_drug_and_prot)
all_prot <- drug_and_proteomics %>% dplyr::filter(assay=="proteomics", rowname %in% prot_few_nas) %>% .$rowname %>% unique()
all_drugs <- drug_and_proteomics %>% dplyr::filter(assay=="drug_resp_mono") %>% .$rowname %>% unique()
prot50 <- rownames(assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE)) > Var50, ])
prot500 <- rownames(assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE)) > Var500, ])
prot1000 <- rownames(assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]) %>% rowVars(na.rm = TRUE)) > Var1000, ])
pat_overlap_prot_RNA <- colnames(intersectColumns(multiomics_MAE[,,c("proteomics", "RNAseq_norm")]))[["proteomics"]]
BCR_genes <- read_excel(path = "/Users/sophierabe/Desktop/PhD/Bioinfo/Data/KEGG_BCellReceptorSignalingPathway.xlsx")
splice_genes <-read_table(file = "/Users/sophierabe/Desktop/PhD/Bioinfo/Data/KEGG_Spliceosome.txt", skip = 2, col_names = "symbol")
## Parsed with column specification:
## cols(
## symbol = col_character()
## )
proteomics_tbl_meta_biomart
as_tibble(colData(multiomics_MAE))
assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[1:5,1:5]
## OMZP0012 OMZP0042 OMZP0062 OMZP0094 OMZP0105
## A2M -0.23106599 -0.123775317 -0.20324611 -0.11422224 0.05369119
## AAAS -0.04446032 -0.003929938 -0.10881390 0.21728422 -0.24019198
## AACS 0.18091637 -0.469318906 0.01926965 0.46120353 0.27485862
## AAGAB 0.11577450 0.129642752 0.10785538 -0.06247763 0.25647801
## AAMDC 0.21443028 -0.003820597 -0.09936349 0.62120261 -0.26938714
dim(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]))
## [1] 7311 68
print(paste( round( (!(assay(multiomics_MAE[,,"proteomics"]) %>% is.na() ) ) %>% colSums() %>% mean) , "proteins per sample were detected on average in proteomics data"))
## [1] "7311 proteins per sample were detected on average in proteomics data"
print(paste( round( (!(assay(multiomics_MAE[,,"RNAseq_full"]) %>% is.na() ) ) %>%
colSums() %>% mean) ,
"transcipts per sample were detected on average in RNASeq data"))
## [1] "63214 transcipts per sample were detected on average in RNASeq data"
if(!any(is.na(assay(multiomics_MAE[,,"RNAseq_full"])))){
print("No NAs in RNASeq dataset")
}
## [1] "No NAs in RNASeq dataset"
print( paste( ( (assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) %>% sum , "different transcripts were detected"))
## [1] "40083 different transcripts were detected"
print(
paste(
multiomics_MAE@metadata$gene_symbol_mapping %>%
filter(ensembl_gene_id %in%
names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
dplyr::select(hgnc_symbol) %>% unique %>%
filter(hgnc_symbol != "") %>%
nrow,
"transcripts with unique hgnc symbols"))
## [1] "35402 transcripts with unique hgnc symbols"
print( paste(
(multiomics_MAE@metadata$gene_symbol_mapping %>%
filter(ensembl_gene_id %in%
names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
dplyr::select(hgnc_symbol) %>% unique %>%
filter(hgnc_symbol != "") %>% .$hgnc_symbol %in%
rownames(multiomics_MAE@ExperimentList$proteomics) ) %>% sum,
"matching proteins and transcripts detected"))
## [1] "7199 matching proteins and transcripts detected"
protpats <- multiomics_MAE@sampleMap %>% as_tibble() %>% filter(assay=="proteomics") %>% .$primary %>% unique
message("Available datasets for proteomics patients:")
multiomics_MAE[,protpats,]@sampleMap %>% .$assay %>% table
## .
## SNPs drug_resp_mono drug_resp_co
## 67 68 68
## chrom_abber health_record_bin health_record_scaled
## 68 68 68
## proteomics RNAseq_full RNAseq_norm
## 68 59 59
order_oncoplot <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
group_by(rowname) %>%
summarise(total=sum(value, na.rm = TRUE)) %>%
arrange(total ) %>%
.$rowname
tmp <- wideFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble()
colnames(tmp) <- colnames(tmp) %>% gsub("SNPs_|chrom_abber_", "",.)
#tmp %>% arrange( desc( del13q14 ) )
for(l in 1:length(order_oncoplot)){
tmp <- tmp %>% arrange_( as.name(order_oncoplot[l] ) )
}
onco_center <-
longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
mutate(alteration= if_else(value==1, assay, as.character(value))) %>%
ggplot(aes(primary, rowname, fill=alteration)) +
geom_tile(color="black") +
scale_fill_manual(values=c("white", "orange1", "#ca0020", "grey"), labels=c("wt", "CNV", "SNV", "NA" ), na.translate=FALSE) +
#pp_sra_noguides_tilted +
scale_y_discrete(limits=order_oncoplot) +
scale_x_discrete(limits=rev(tmp$primary)) +
theme(panel.background = element_rect(fill= "darkgrey"), panel.grid = element_blank(),
axis.text.x = element_blank(), axis.title = element_blank(),
aspect.ratio = 1, axis.ticks.x = element_blank(), legend.position = "bottom") +
guides(fill=guide_legend(title = NULL ))
onco_right <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
ggplot(aes(rowname, value)) + geom_col(aes(fill=assay)) +
scale_x_discrete(limits=order_oncoplot) +
coord_flip()+
scale_fill_manual(values=c("orange1", "#ca0020")) +
theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
aspect.ratio = 1, axis.line.x = element_line(color="black"))
onco_right_perc <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble() %>%
group_by(rowname) %>%
dplyr::summarise(perc=round(sum(value, na.rm = TRUE)/ sum(!is.na(value)) , 2) ) %>%
ggplot(aes(rowname, 1, label=paste0(perc*100, "%") )) +
geom_text() + coord_flip() + theme_void() +
scale_x_discrete(limits=order_oncoplot)
onco_right_total <-
longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble() %>%
group_by(rowname) %>%
dplyr::summarise(total=sum(value, na.rm = TRUE )) %>%
ggplot(aes(rowname, 1, label=total)) +
geom_text() + coord_flip() + theme_void() +
scale_x_discrete(limits=order_oncoplot)
onco_top <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
ggplot(aes(primary, value)) + geom_col(aes(fill=assay)) +
scale_x_discrete(limits=rev(tmp$primary)) +
scale_fill_manual(values=c("orange1", "#ca0020")) +
theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
aspect.ratio = 1, axis.line.x = element_line(color="black"))
p1<- insert_yaxis_grob(onco_center,get_panel(onco_right_total) , grid::unit(.2, "null"), position = "right")
p1.2<- insert_yaxis_grob(p1, get_panel(onco_right), grid::unit(.1, "null"), position = "right")
p1.3<- insert_yaxis_grob(p1.2, get_panel(onco_right_perc), grid::unit(.1, "null"), position = "right")
oncoplot <- insert_xaxis_grob(p1.3, onco_top, grid::unit(.2, "null"), position = "top")
ggdraw(oncoplot)
drs_sel <- metadata(multiomics_MAE)$drugs_functional_groups %>%
filter( !( grepl("\\+|DMSO|SSZ|phenyleth|Oxaliplatin|Hydroxychloroquine|Obatoclax mesylate|Vitamin", Drug) ) )
order_drugs <- drs_sel$Pathways %>% table() %>% as_tibble() %>% arrange(desc(n)) %>% .$.
drug_nrs <- drs_sel %>%
ggplot(aes(Pathways)) + geom_bar() +
pp_sra_noguides_tilted +
scale_x_discrete(limits=order_drugs)
drug_nrs
#plot_grid(oncoplot, drug_nrs, align = "v", axis="t")
(!is.na(multiomics_MAE@ExperimentList$proteomics)) %>%
colSums() %>%
enframe(name = "Patients", value = "Nr. of proteins") %>%
ggplot(aes(Patients, `Nr. of proteins`)) + geom_col() + pp_sra + theme(axis.text.x = element_blank())
save(all_drugs,
BCR_genes, splice_genes,
drug_and_proteomics, drug_and_proteomics_drug,
multiomics_MAE,
pat_overlap_prot_RNA,
prot_few_nas,
prot1000, prot500, prot50,
proteomics_tbl_meta_biomart, proteomics_tbl_meta_biomart_chrab, proteomics_tbl_meta_biomart_health, proteomics_tbl_meta_biomart_SNP,
Var1000, Var500, Var50,
file="/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Proteomics_Git/Robjects/CLL_Proteomics_Setup.RData")
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] cowplot_1.0.0 survminer_0.4.6
## [3] ConsensusClusterPlus_1.46.0 RColorBrewer_1.1-2
## [5] gplots_3.0.1.1 PMA_1.1
## [7] MultiAssayExperiment_1.8.3 biomaRt_2.38.0
## [9] biomartr_0.9.0 Rtsne_0.15
## [11] IHW_1.10.1 apeglm_1.4.2
## [13] limma_3.38.3 fdrtool_1.2.15
## [15] vsn_3.50.0 forcats_0.4.0
## [17] stringr_1.4.0 dplyr_0.8.3
## [19] purrr_0.3.3 readr_1.3.1
## [21] tidyr_1.0.0 tibble_2.1.3
## [23] tidyverse_1.3.0 TOSTER_0.3.4
## [25] DESeq2_1.22.2 SummarizedExperiment_1.12.0
## [27] DelayedArray_0.8.0 BiocParallel_1.16.6
## [29] matrixStats_0.55.0 Biobase_2.42.0
## [31] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [33] IRanges_2.16.0 S4Vectors_0.20.1
## [35] BiocGenerics_0.28.0 readxl_1.3.1
## [37] ggrepel_0.8.1 ggpmisc_0.3.3
## [39] ggfortify_0.4.8 ggbeeswarm_0.6.0
## [41] effsize_0.7.6 ggpubr_0.2.4
## [43] magrittr_1.5 lemon_0.4.3
## [45] Hmisc_4.3-0 ggplot2_3.2.1
## [47] Formula_1.2-3 survival_3.1-8
## [49] lattice_0.20-38 Matrix_1.2-18
## [51] progress_1.2.2 reshape2_1.4.3
## [53] pheatmap_1.0.12 openxlsx_4.1.4
## [55] gtools_3.8.1 plyr_1.8.4
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.5 lazyeval_0.2.2 splines_3.5.2
## [4] lpsymphony_1.10.0 digest_0.6.23 htmltools_0.4.0
## [7] gdata_2.18.0 fansi_0.4.0 checkmate_1.9.4
## [10] memoise_1.1.0 cluster_2.1.0 Biostrings_2.50.2
## [13] annotate_1.60.1 modelr_0.1.5 prettyunits_1.0.2
## [16] colorspace_1.4-1 blob_1.2.0 rvest_0.3.5
## [19] haven_2.2.0 xfun_0.11 crayon_1.3.4
## [22] RCurl_1.95-4.12 jsonlite_1.6 genefilter_1.64.0
## [25] impute_1.56.0 zeallot_0.1.0 zoo_1.8-6
## [28] glue_1.3.1 gtable_0.3.0 zlibbioc_1.28.0
## [31] XVector_0.22.0 scales_1.1.0 DBI_1.0.0
## [34] Rcpp_1.0.3 emdbook_1.3.11 xtable_1.8-4
## [37] htmlTable_1.13.3 foreign_0.8-72 bit_1.1-14
## [40] km.ci_0.5-2 preprocessCore_1.44.0 htmlwidgets_1.5.1
## [43] httr_1.4.1 ellipsis_0.3.0 acepack_1.4.1
## [46] pkgconfig_2.0.3 XML_3.98-1.20 nnet_7.3-12
## [49] dbplyr_1.4.2 locfit_1.5-9.1 tidyselect_0.2.5
## [52] rlang_0.4.2 AnnotationDbi_1.44.0 munsell_0.5.0
## [55] cellranger_1.1.0 tools_3.5.2 cli_2.0.0
## [58] generics_0.0.2 RSQLite_2.1.4 broom_0.5.2
## [61] evaluate_0.14 yaml_2.2.0 knitr_1.26
## [64] bit64_0.9-7 fs_1.3.1 zip_2.0.4
## [67] survMisc_0.5.5 caTools_1.17.1.3 nlme_3.1-142
## [70] slam_0.1-46 xml2_1.2.2 compiler_3.5.2
## [73] rstudioapi_0.10 curl_4.3 beeswarm_0.2.3
## [76] affyio_1.52.0 ggsignif_0.6.0 reprex_0.3.0
## [79] geneplotter_1.60.0 stringi_1.4.3 KMsurv_0.1-5
## [82] vctrs_0.2.0 pillar_1.4.2 lifecycle_0.1.0
## [85] BiocManager_1.30.10 data.table_1.12.8 bitops_1.0-6
## [88] R6_2.4.1 latticeExtra_0.6-28 affy_1.60.0
## [91] KernSmooth_2.23-16 gridExtra_2.3 vipor_0.4.5
## [94] MASS_7.3-51.4 assertthat_0.2.1 withr_2.1.2
## [97] GenomeInfoDbData_1.2.0 hms_0.5.2 rpart_4.1-15
## [100] coda_0.19-3 rmarkdown_1.18 bbmle_1.0.20
## [103] numDeriv_2016.8-1.1 lubridate_1.7.4 base64enc_0.1-3